TensorScatterAdd
在输入张量的指定位置执行加法操作。 根据给定的 indices 和 updates,在输入张量中相应的位置将更新值加到原值上,生成新的输出张量。
\[output[indices] = input + updates\]
- 输入:
input - 输入张量数据地址。
input_shape - 输入张量形状数组。
input_rank - 输入张量维度数。
indices - 指定更新位置的索引数组。
updates - 更新数据地址。
num_unit - 每个更新单元的长度。
index_depth - 索引深度(indices 的最后一维长度)。
output_unit_offsets(int*, 可选) - 单元偏移数组(仅私有版本使用)。
strides(int*, 可选) - 步长数组(仅私有版本使用)。
core_mask(int, 可选) - 核掩码(仅共享存储版本使用)。
- 输出:
output - 输出张量数据地址,存放更新后的结果。
- 支持平台:
FT78NEMT7004
备注
FT78NE 支持 int8, int16, int32, fp32, fp64, cplx64, cplx128
MT7004 支持 fp16, fp32, int16, int32, cplx64
共享存储版本:
-
void fp_tensor_scatter_add_s(float *input, int *input_shape, int input_rank, int *indices, float *updates, float *output, int num_unit, int index_depth, int core_mask)
-
void i32_tensor_scatter_add_s(int32_t *input, int *input_shape, int input_rank, int *indices, int32_t *updates, int32_t *output, int num_unit, int index_depth, int core_mask)
-
void i16_tensor_scatter_add_s(int16_t *input, int *input_shape, int input_rank, int *indices, int16_t *updates, int16_t *output, int num_unit, int index_depth, int core_mask)
-
void i8_tensor_scatter_add_s(int8_t *input, int *input_shape, int input_rank, int *indices, int8_t *updates, int8_t *output, int num_unit, int index_depth, int core_mask)
-
void hp_tensor_scatter_add_s(half *input, int *input_shape, int input_rank, int *indices, half *updates, half *output, int num_unit, int index_depth, int core_mask)
-
void dp_tensor_scatter_add_s(double *input, int *input_shape, int input_rank, int *indices, double *updates, double *output, int num_unit, int index_depth, int core_mask)
-
void c64_tensor_scatter_add_s(float *input, int *input_shape, int input_rank, int *indices, float *updates, float *output, int num_unit, int index_depth, int core_mask)
-
void c128_tensor_scatter_add_s(double *input, int *input_shape, int input_rank, int *indices, double *updates, double *output, int num_unit, int index_depth, int core_mask)
C调用示例:
1// FT78NE 示例 2#include <stdio.h> 3 4int main(int argc, char* argv[]) { 5 float *input = (float *)0xA0000000; 6 float *output = (float *)0xA1000000; 7 int input_shape[2] = {4, 4}; 8 int input_rank = 2; 9 int indices[2] = {1, 2}; 10 float updates[1] = {3.14}; 11 int num_unit = 1; 12 int index_depth = 2; 13 int core_mask = 0xff; 14 fp_tensor_scatter_add_s(input, input_shape, input_rank, indices, updates, output, num_unit, index_depth, core_mask); 15 return 0; 16}
私有存储版本:
-
void fp_tensor_scatter_add_p(float *input, int *input_shape, int input_rank, int *indices, float *updates, float *output, int num_unit, int index_depth, int *output_unit_offsets, int *strides)
-
void i32_tensor_scatter_add_p(int32_t *input, int *input_shape, int input_rank, int *indices, int32_t *updates, int32_t *output, int num_unit, int index_depth, int *output_unit_offsets, int *strides)
-
void i16_tensor_scatter_add_p(int16_t *input, int *input_shape, int input_rank, int *indices, int16_t *updates, int16_t *output, int num_unit, int index_depth, int *output_unit_offsets, int *strides)
-
void i8_tensor_scatter_add_p(int8_t *input, int *input_shape, int input_rank, int *indices, int8_t *updates, int8_t *output, int num_unit, int index_depth, int *output_unit_offsets, int *strides)
-
void hp_tensor_scatter_add_p(half *input, int *input_shape, int input_rank, int *indices, half *updates, half *output, int num_unit, int index_depth, int *output_unit_offsets, int *strides)
-
void dp_tensor_scatter_add_p(double *input, int *input_shape, int input_rank, int *indices, double *updates, double *output, int num_unit, int index_depth, int *output_unit_offsets, int *strides)
-
void c64_tensor_scatter_add_p(float *input, int *input_shape, int input_rank, int *indices, float *updates, float *output, int num_unit, int index_depth, int *output_unit_offsets, int *strides)
-
void c128_tensor_scatter_add_p(double *input, int *input_shape, int input_rank, int *indices, double *updates, double *output, int num_unit, int index_depth, int *output_unit_offsets, int *strides)
C调用示例:
1// FT78NE 示例 2#include <stdio.h> 3 4int main(int argc, char* argv[]) { 5 float *input = (float *)0x10000000; 6 float *output = (float *)0x10010000; 7 int *output_unit_offsets = (int *)0x10020000; 8 int *strides = (int *)0x10030000; 9 int input_shape[2] = {4, 4}; 10 int input_rank = 2; 11 int indices[2] = {0, 1}; 12 float updates[1] = {2.71}; 13 int num_unit = 1; 14 int index_depth = 2; 15 fp_tensor_scatter_add_p(input, input_shape, input_rank, indices, updates, output, num_unit, index_depth, output_unit_offsets, strides); 16 return 0; 17}